* Set current directory and open log
cd "E:\Seagate Sync\VOL\Personal folder\Documents\Research\Current projects\Working party meetings\Posted\EUP 160404"
capture log close
log using "Data management\wpar-management08-merge", replace text


**************************************************************************
* Merge and consistency checks of working party meeting and lifecycle data
**************************************************************************


* Project and author information
********************************
* Programme:	wpar-management08-merge.do
* Date:			4 April 2016
* Author:		Frank Haege, University of Limerick
* Contact:		frank.haege@ul.ie 

* Do-file description
*********************
* This do-file merges the meeting data with the lifecycle information, performs consistency checks,
* makes corrections (especially distinguishing successive WPs with similar names by the time period of 
* their existence), and re-merges the corrected meeting data with the lifecycle data.


* Clear memory  
**************
version 12.0
clear all
macro drop _all
set more off


* Load data
use "Data management\wpar-management06-workingparties03-merge.dta", clear
des
* 72,292 observations

* Rename WP ID variable
rename wpcode wpid


* Merge with working party lifecycle data
*****************************************

* Merge with initial working party information
rename wpname wpname_fr
merge m:1 wpid using "Data management\wpar-management07-wplife-prepare02.dta"
rename _merge merge1
tab merge1, m
* 2,255 observations cannot be merged with working party
* 2,179 from meeting data
* 76 from lifecycle data

* Consistency check
tab merge1 meets, m
list date birth wpid wpname if meets == 1 & merge1 == 2
list date birth wpid wpname if meets == 0 & merge1 == 3
groups wpid wpname if merge1 == 3 & meets == 0
* Out of 76 working parties that could not be merged, 
* 63 miss any meetings from the outset, the other 13 working parties can only be merged 
* after distinguishing groups by meeting date
* One working party (K08u), which indicates no meeting, is going to be corrected below 

* Drop non-matched group observations from lifecycle data
* (This either happens because there were no recorded meetings
* or because the group id refers to a successor working party
sort wpid
groups wpid wpname meets succwp succwp2 if merge1 == 2, order(h) show(f) miss
groups wpid wpname meets succwp succwp2 if merge1 == 2 & meets == 0, order(h) show(f) miss
groups wpid wpname meets succwp succwp2 if merge1 == 2 & meets == 1, order(h) show(f) miss
drop if merge1 == 2
* 76 observations dropped


**************************************************************************** 
* Correct working party ID variable based on information from lifecycle data
****************************************************************************

* Generate corrected WP ID variable
generate wpid1 = wpid

* Generate variable indicating consistency after check and corrections if necessary
generate consist = 0


* Check WPs that have survived throughout the study period
**********************************************************

* WPs that survived throughout
tab wpid if birth == . & death == .
* 51,315 observations
replace consist = 1 if birth == . & death == .


* Check WPs with unknow pre-1999 establishment date but with abolishment date
******************************************************************************

* 1. WPs that were created before 1999 but were abolished afterwards
tab wpid if birth == . & death != .
* 9,000 observations

* Observations with meetings before abolishment date
tab wpid if birth == . & death != . & date < death
* 8,221 observations
replace consist = 1 if birth == . & death != . & date < death

* Observations with meetings after abolishment date
tab wpid if birth == . & death != . & date >= death
* 779 observations

* 1.1 Observations with meetings after abolishment date and no successor WP
tab wpid if birth == . & death != . & date >= death & s1id == ""
list wpid wpname_fr wpname death date if birth == . & death != . & date >= death & s1id == ""
* 58 meetings after the abolishment date, mostly sub-groups, they should not exist anymore
* at this point in time, but actual practice seems to differ from formal structure
* No correction required (or rather possible)
replace consist = 1 if birth == . & death != . & date >= death & s1id == ""

* 1.2 Observations with meeting after abolishment date, but with successor WP
tab wpid if birth == . & death != . & date >= death & s1id != ""
list wpid wpname_fr s1name death date if birth == . & death != . & date >= death & s1id != "", noobs noh
* 721 observations

* 1.2.1 Observations with meetings after abolishment date, with successor WP,
* and successor WP replaces previous WP without delay
tab wpid if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax
list wpid wpname_fr s1name death date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death == s1birthmax
* 642 observations

* 1.2.1.1 Observations with meetings after abolishment date, with successor WP,
* successor WP replaces previous WP without delay, and there is no second successor
tab wpid if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id == ""
* 477 observations
list wpid wpname_fr s1name death date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id == ""
* Make correction to first successor ID
replace wpid1 = s1id if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id == ""
replace consist = 1 if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id == ""

* 1.2.1.2 Observations with meetings after abolishment date, with successor WP,
* successor WP replaces previous WP without delay, and there is a second successor
tab wpid if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id != ""
* 165 observations
list wpid wpname_fr s1name death date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id != ""
tab wpid if birth == . & death != . & date >= death & s1id != "" /*
	*/ & death == s1birthmax & s2id != "" & s1death == s2birthmax
* All second successor WPs replace previous WP without delay

* 1.2.1.2.1 Observations with meetings after abolishment date, with successor WP,
* successor WP replaces previous WP without delay, there is a second successor, 
* but meeting takes place during first successor
tab wpid if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date < s1death
* 132 observations
list wpid wpname_fr s1name death date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id != "" & date < s1death
* Make correction to first successor ID
replace wpid1 = s1id if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date < s1death
replace consist = 1 if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date < s1death
	
* 1.2.1.2.2 Observations with meetings after abolishment date, with successor WP,
* successor WP replaces previous WP without delay, and there is a second successor,
* during which the meeting takes place
tab wpid if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date >= s1death
* 33 observations
list wpid wpname_fr s1name s2name death date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death == s1birthmax & s2id != "" & date >= s1death
* Make correction to second successor ID
replace wpid1 = s2id if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date >= s1death
replace consist = 1 if birth == . & death != . & date >= death & s1id != "" & death == s1birthmax /*
	*/ & s2id != "" & date >= s1death
	
* 1.2.2 Observations with meetings after abolishment date, with successor WP,
* but successor WP is established with delay
tab wpid if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax
* 79 observations
list wpid wpname_fr s1name death s1birthmin s1birthmax s1death s2birthmax date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & s2id != ""
* None of those observations has a second successor WP

* 1.2.2.1 Observations with meetings after abolishment date, with successor WP,
* successor WP is established with delay, and meeting date is after maximum successor birth date
tab wpid if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & date >= s1birthmax
tab wpid if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & date >= s1birthmax & s2id != ""
* No observations with second successor WP
list wpid wpname_fr s1name death s1birthmin s1birthmax s1death s2birthmax date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & date >= s1birthmax
* 37 observations have dates after the maximum successor birth date
* Make correction to first successor ID
replace wpid1 = s1id if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax /*
	*/ & date > s1birthmax
replace consist = 1 if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax /*
	*/ & date > s1birthmax

* 1.2.2.2 Observations with meetings after abolishment date, with successor WP,
* successor WP is established with delay, and meeting date is before maximum successor birth date
tab wpid if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & date < s1birthmax
list wpid wpname_fr s1name death s1birthmax date if /*
	*/ birth == . & death != . & date >= death & s1id != "" & death != s1birthmax & date < s1birthmax
* 42 observations have dates before the maximum successor birth date, but that's because the successor WP
* has a missing as establishment date (i.e. it has been established before 1999, and the other WP is
* merged or subsumed at a later point).
* Make correction to first successor ID
replace wpid1 = s1id if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax
replace consist = 1 if birth == . & death != . & date >= death & s1id != "" & death != s1birthmax


* Check WPs with post-1999 establishment date but without abolishment date
**************************************************************************

* 2. WPs that were created after 1999 but were never abolished
tab wpid if birth != . & death == .
* 10,833 observations

* Observations with meetings after establishment date
tab wpid if birth != . & death == . & date >= birth
* 9,804 observations
replace consist = 1 if birth != . & death == . & date >= birth

* Observations with meetings before establishment date
tab wpid if birth != . & death == . & date < birth
* 1,029
groups wpname if birth != . & death == . & date < birth
list wpid wpname_fr date birth if birth != . & death == . & date < birth & wpid != "C40a"

* Make corrections after manual review
* Before 2003, Working Party on Statistics referred to working party in Internal Market sector
replace wpid1 = "K03" if wpid == "B19" & date < d(03mar2003)
* Before 2008, own resources was sub-group of Financial Questions WP
replace wpid1 = "F02a" if wpid == "F14" & date < d(08jul2008)
* In 2000, there was a separate high level group on taxation
replace wpid1 = "F10" if wpid == "F11" & date >= d(01feb2000) & date < d(5jul2000)
* Pre-1999 high-level group on tax policy
replace wpid1 = "F00" if wpid == "F11" & date < d(01feb2000)
* JAI external relations in 2000 has nothing to do with WP founded in 2008
replace wpid1 = "G00" if wpid == "G30" & date < d(01jan2008)
* Genetically modified organisms were dealt with by Friends of the Presidency/Attaches before 2006
replace wpid1 = "A10" if wpid == "H07i" & date < d(24apr2006) & regexm(wpname_fr, "PRESIDENCE")
replace wpid1 = "H46" if wpid == "H07i" & date < d(24apr2006) & regexm(wpname_fr, "ATTACHES")
* Predecessor working party was in charge of beekeeping and honey until 2003
replace wpid1 = "H10" if wpid == "H08f" & date < d(04mar2003)
* Code joint meeting of telecommunications and information society group as such
expand 2 if wpid == "L03" & date < d(01feb2003) 
replace wpid1 = "L01" if wpid == "L03" & date < d(01feb2003) & _n == _N
replace wpid1 = "M01" if wpid == "L03" & date < d(01feb2003)
* Correct pre-1999 EQUIPEMENTS SOUS PRESSION
replace wpid1 = "K00" if wpid == "K08u" & date < d(01feb1997)
* Correct error in coding of working party
replace wpid1 = "C40" if wpid == "H23d" & wpname_fr == "MILEX 08 / CPM"

* Check corrections
groups wpid if birth != . & death == . & date < birth & wpid == wpid1
list wpid wpid1 wpname_fr birth date if birth != . & death == . & date < birth & wpid != "C40a" & wpid == wpid1
list wpid wpid1 wpname_fr birth date if birth != . & death == . & date < (birth-180) & wpid != "C40a" & wpid == wpid1
replace consist = 1 if birth != . & death == . & date < birth
* 1030 changes (1 more because of expansion of dataset by one observation)


* Check WPs with post-1999 establishment date and abolishment date
******************************************************************

* 3. WPs that were created after 1999 and abolised before end of study period
tab wpid if birth != . & death != .
* 1,144 observations

* Observations with meetings between establishment and abolishment date
tab wpid if birth != . & death != . & date >= birth & date < death
* 834 observations
replace consist = 1 if birth != . & death != . & date >= birth & date < death

* 3.1 Observations with meetings before establishment date
tab wpid if birth != . & death != . & date < birth
* 152 observations
list wpid wpname_fr birth date if birth != . & death != . & date < birth

* Replace pre-1999 observations with general sector code
tab wpid if birth != . & death != . & date < birth & date < (birth - 180)
* 63 observations
list wpid wpid1 wpname_fr wpname birth date if birth != . & death != . & date < birth & date < (birth - 180)
replace wpid1 = "F00" if wpid == "F06k" & date < d(26nov1999)
replace wpid1 = "C00" if wpid == "F13" & regexm(wpname_fr, "SUD - EST")
replace wpid1 = "F00" if wpid == "F13" & date < d(26nov1999)
replace wpid1 = "K00" if wpid == "K05c" & date < d(26nov1999)
replace wpid1 = "K00" if wpid == "K05d" & date < d(26nov1999)
replace wpid1 = "K00" if wpid == "K05e" & date < d(26nov1999)
replace wpid1 = "K00" if wpid == "K14" & date < d(26nov1999)
replace wpid1 = "R00" if wpid == "R01a" & date < d(26nov1999)
replace wpid1 = "R00" if wpid == "R06a" & date < d(26nov1999)

* Correct Environment and Development group coding
list wpid wpname_fr wpname birth date if birth != . & death != . & date < birth & date < (birth - 180) & wpid == wpid1
replace wpid1 = "R06a" if wpid == "R01a" & date > d(26nov1999) & date < (birth - 180)
list wpid wpname_fr wpname birth date if birth != . & death != . & date < birth & wpid == wpid1 & date < (birth - 180)
* All observations recoded
replace consist = 1 if birth != . & death != . & date < birth & date < birth - 180

* Check remaining observations
tab wpid if birth != . & death != . & date < birth & date >= (birth - 180)
* 89 observations
list wpid wpname_fr wpname birth date if birth != . & death != . & date < birth & date >= (birth - 180)
* All meetings took place just before the recorded establishment of the working party
replace consist = 1 if birth != . & death != . & date < birth & (date >= (birth - 180) | wpid != wpid1)

* 3.2 Observations with meetings after abolishment date
tab wpid if birth != . & death != . & date >= death
* 158 observations
list wpid wpname_fr wpname death date if birth != . & death != . & date > death

* 3.2.1 Observations with meetings after abolishment date, but no successor WP
tab wpid if birth != . & death != . & date >= death & s1id == ""
* 71 observations
list wpid wpname_fr wpname death date if birth != . & death != . & date >= death & s1id == ""
* Most of them relate to the climate change sub-group of the international environment WP 
* or three sub-groups of the Financial Services WP, all which have officially been abolished, but meetings still
* take place under the respective denominations
* No corrections required
replace consist = 1 if birth != . & death != . & date >= death & s1id == ""

* 3.2.2 Observations with meetings after abolishment date that have a successor WP
tab wpid if birth != . & death != . & date >= death & s1id != ""
* 87 observations
list wpid wpname_fr wpname death date if birth != . & death != . & date >= death & s1id != ""
* Only five groups, one meeting is clearly a meeting of the Ad hoc WP on Joint EU-Africa strategy, although it should have
* been merged with the Africa WP
replace consist = 1 if birth != . & death != . & date >= death & s1id != "" & wpid == "C48"
* 1 observation
* Correct working party codes for working parties that had a successor working party (post-2010)
replace wpid1 = "B27" if wpid == "B12a" & birth != . & death != . & date >= death & s1id != ""
replace consist = 1 if wpid == "B12a" & birth != . & death != . & date >= death & s1id != ""
* 5 observations
replace wpid1 = "K08s" if wpid == "K08k" & birth != . & death != . & date >= death & s1id != ""
replace consist = 1 if wpid == "K08k" & birth != . & death != . & date >= death & s1id != ""
* 2 observations
* The other group is the Ad hoc WP on drafting the accession treaty
tab wpid if birth != . & death != . & date >= death & s1id != "" & wpid == "B18"
list wpid date death s1id s1birthmax s1death s2id s2birthmax s2death if /*
	*/ birth != . & death != . & date >= death & s1id != "" & wpid == "B18"
replace wpid1 = s1id if birth != . & death != . & date >= death & s1id != "" & wpid == "B18" & /*
	*/ date >= s1birthmax & date < s1death
replace wpid1 = s2id if birth != . & death != . & date >= death & s1id != "" & wpid == "B18" & /*
	*/ date >= s2birthmax & date < d(01jan2011)
replace wpid1 = "B25" if birth != . & death != . & date >= death & s1id != "" & wpid == "B18" & /*
	*/ date >= s2birthmax & date >= d(01jan2011)
list wpid wpid1 date death s1id s1birthmax s1death s2id s2birthmax if /*
	*/ birth != . & death != . & date > death & s1id != "" & wpid == "B18"
list wpid date death s1id s1birthmax s1death s2id s2birthmax if /*
	*/ birth != . & death != . & date > death & s1id != "" & wpid == "B18" & wpid == wpid1
replace consist = 1 if birth != . & death != . & date >= death & s1id != "" & wpid == "B18"
* 78 observations
* The fifth group is the interim political and security committee, which met on the same day
* as the Council decision setting up the political and security committee was adopted
* Keep original coding for this working party
replace consist = 1 if birth != . & death != . & date >= death & s1id != "" & wpid == "C38"
* 1 observation

* Check that all observations have been checked
tab consist, m
drop consist


********************************************************************
* Merge meeting data with WP lifecycle data based on corrected WP ID
********************************************************************

* Prepare data for second merge 
replace wpid = wpid1
* 940 codes changed
order merge1, before(wprunno)
drop wprunno-wpid1

* Merge with successor working party information
merge m:1 wpid using "Data management\wpar-management07-wplife-prepare02.dta"
rename _merge merge2
tab merge2, m
tab merge2 meets, m
list wpid wpname if merge2 == 3 & meets == 0
list wpid wpname if merge2 == 2 & meets == 1
* 64 working groups did not meet
list wpid wpname death if merge2 == 2

* Generate indicator variable for groups that are not official working parties
* Either pre-1999 groups or groups that meet in the Council but are not part of the list
* This also includes attache meetings and meetings that could not be linked to a specific group
* in the policy area
generate nowp = 1
label var nowp "Official WP (yesno)"
label val nowp yesno
replace nowp = 0 if /*
	*/ wpid == "B28" | /*
	*/ wpid == "C50" | /*
	*/ wpid == "C51" | /*
	*/ wpid == "G37" | /*
	*/ wpid == "G38" | /*
	*/ wpid == "H46" | /*
	*/ wpid == "I04" | /*
	*/ wpid == "Q05" | /*
	*/ wpid == "B00" | /*
	*/ wpid == "C00" | /*
	*/ wpid == "D00" | /*
	*/ wpid == "F00" | /*
	*/ wpid == "G00" | /*
	*/ wpid == "H00" | /*
	*/ wpid == "K00" | /*
	*/ wpid == "L00" | /*
	*/ wpid == "R00"
tab nowp merge2, m	
* All 'real' working party meetings matched!
	
* Code policy area for non-matched meeting observations
* (These are either coded in terms of the general sector or related to groups that are not part 
* of the formal list)
groups wpid wpname_fr if merge1 == 1, show(f F P) miss
replace policy = 1 if wpid == "B00"
replace policy = 2 if wpid == "C00"
replace policy = 3 if wpid == "D00"
replace policy = 5 if wpid == "F00"
replace policy = 6 if wpid == "G00"
replace policy = 7 if wpid == "H00"
replace policy = 9 if wpid == "K00"
replace policy = 10 if wpid == "L00"
replace policy = 15 if wpid == "R00"

* AD HOC POSEIDOM - POSEICAN - POSEIMA, only met before 1999
list wpname_fr wpname date policy if wpid == "B28"
replace policy = 1 if wpid == "B28" 

* South Africa Working Party (pre-1999)
list wpname_fr wpname date if wpid == "C50"
drop if wpid == "C50" & regex(wpname_fr, "CNUCED IX") /* Drop UNCTAD conference */
* Drop 13 observations
drop if wpid == "C50" & date > d(26nov1999) 		  /* Drop cooperation talks with South Africa after 1999 */
* Drop 3 observations
replace policy = 2 if wpid == "C50"

* JAI Counsellors (not an offical working party)
list wpname_fr wpname date if wpid == "G37"
replace policy = 6 if wpid == "G37"

* Friends of Visa Information System (not an official working party)
list wpname_fr wpname date if wpid == "G38"
replace policy = 6 if wpid == "G38"

* Agri Counsellors (not an offical working party)
list wpname_fr wpname date if wpid == "H46"
replace policy = 7 if wpid == "H46"

* Fisheries Counsellors (not an offical working party)
list wpname_fr wpname date if wpid == "I04"
replace policy = 8 if wpid == "I04"

* Transport Counsellors and other generic transport meetings
list wpname_fr wpname date if wpid == "Q05"
replace policy = 14 if wpid == "Q05"
tab merge2, m
tab policy merge2, m
* All 2,266 observations that could not be merged 
* have been assigned a general policy code

* Drop working groups without meetings and meeting indicator variable
tab meets merge2, m
list wpname_fr wpname date if meets == 1 & merge2 == 2
drop if merge2 == 2
tab meets, m
* Missings refer to observations without a specific working party code
tab wpid if meets == .
drop meets

* Keep only relevant variables
drop merge1 merge2 wpcodeno rno wprunno

* Rename variables
rename birthdate wpbirthdate
rename birth wpbirth
rename deathdate wpdeathdate
rename death wpdeath
rename sectornew wpsecnew
rename sectorold wpsecold
rename lcensor wplcensor
rename cod wpcod
rename succwp wpsuccwp
rename succwp2 wpsuccwp2
rename policy wppolicy
rename birthdoc wpbirthdoc
rename comment wpcomment
rename wpname_fr name_fr
rename wpname_orig name_orig
rename s1sectornew s1secnew
rename s1sectorold s1secold
rename s2sectornew s2secnew
rename s2sectorold s2secold

* Order variables
order wp* s1* s2*, after(nowp)
order wpid wppolicy wpsecold wpsecnew wpname, after(nowp)
order s1id s1policy s1secold s1secnew s1name, after(wpcomment)
order s2id s2policy s2secold s2secnew s2name, after(s1cod)


* Save dataset
compress
sort date slot wpid
des, s
* 72,277 observations
save "Data management\wpar-management08-merge.dta", replace

* Close log
log close
exit
